1. load packages & libraries
#install.packages("dplyr") # new  version
#install.packages(c("snakecase", "janitor")) #tidy names
#install.packages("pillar") #Dependency for patchwork
#devtools::install_github("thomasp85/patchwork") #For multi-panel plots
#install.packages(c("osmdata", "ggmap"))
#install.packages(c("sp", "sf"))

library(tidyverse) 
## ── Attaching packages ────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.2.0     ✔ purrr   0.2.5
## ✔ tibble  2.1.3     ✔ dplyr   0.8.1
## ✔ tidyr   0.8.1     ✔ stringr 1.3.1
## ✔ readr   1.1.1     ✔ forcats 0.3.0
## ── Conflicts ───────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(janitor)   #Clean names
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(patchwork) #Multi panel plots
library(osmdata)   #OSM
## Data (c) OpenStreetMap contributors, ODbL 1.0. http://www.openstreetmap.org/copyright
library(ggmap)     #Tidy gg maps
## Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
## Please cite ggmap if you use it! See citation("ggmap") for details.
library(purrr)     #Map functions
library(sp)        #Spatial forms
library(sf)        #Spatial forms
## Linking to GEOS 3.5.1, GDAL 2.1.2, PROJ 4.9.3

DEFINING FUNCTIONS

  1. Define the function for finding points
#Fairfax read OSM to output tibble (tidyr df)

osm_to_df <- function(key = "leisure", value = "playground", type = "point") {
#Read in playground data
read.df <- getbb("fairfax county", format_out = "polygon") %>%
  opq() %>%
  add_osm_feature(key = key, value = value) %>%
  osmdata_sf()
    
    if (type == "point" & !is.null(read.df$osm_points)) {
      
      lat.long.df <- do.call(rbind, st_geometry(read.df$osm_points)) %>% 
        as_tibble() %>% 
        setNames(c("longitude", "latitude")) %>%
        mutate(object_id = 1:nrow(.) %>% as.factor()) %>%
        dplyr::select(object_id, everything())
      
    } else if (type == "polygon" & !is.null(read.df$osm_polygons)) {
      
      poly.list <- do.call(rbind, st_geometry(read.df$osm_polygons)) %>%
        lapply(as.tibble)
      npolys    <- length(poly.list)
      
      lat.long.df <- tibble(
        object_id = 1:npolys %>% as.factor(),
        coord.df  = poly.list
        ) %>%
        unnest(coord.df) %>%
        rename(
        longitude = lon,
        latitude  = lat
        )
      
    } else if (type == "line" & !is.null(read.df$osm_lines)) {
      
      line.list <- do.call(list, st_geometry(read.df$osm_lines)) %>%
        lapply(., rbind) %>%
        lapply(., as.tibble)
      nlines    <- length(line.list)
      
      lat.long.df <- tibble(
        object_id = 1:nlines %>% as.factor(),
        coord.df  = line.list
        ) %>%
        unnest(coord.df) %>%
        rename(
        longitude = lon,
        latitude  = lat
        )
    } else {
      ifelse(is.null(read.df$osm_points), stop("key/value incorrect or object type is empty"), 
             ifelse(!(type %in% c("point", "polygon", "line")), stop("type is not point, polygon, or line"),
                    stop("object type is empty, try another")))
    }
  
  return(lat.long.df)

}
  1. Define the function for mapping
#Boundary and outline of Fairfax County
fairfax.gg <- function() {
fairfax.box <- getbb("fairfax county")
fairfax.boundary <- getbb("fairfax county", format_out = "polygon") %>%
  as.tibble() %>%
  rename(longitude = `V1`, latitude = `V2`)

#Grab the map info (many varieties)
fairfax.map <- get_map(location = fairfax.box, source="stamen", maptype="watercolor", crop = TRUE)

#ggmap and ggplot map and boundary
ff.map <- ggmap(fairfax.map) +
  geom_polygon(data = fairfax.boundary, aes(x = longitude, y = latitude), colour = "black", size = 1, alpha = 0.1) +
  labs(
    x = "Longitude",
    y = "Latitude"
  )
  return(ff.map)
}

#Call
ff.map <- fairfax.gg()
## Warning: `as.tibble()` is deprecated, use `as_tibble()` (but mind the new semantics).
## This warning is displayed once per session.
## Source : http://tile.stamen.com/terrain/11/582/782.png
## Source : http://tile.stamen.com/terrain/11/583/782.png
## Source : http://tile.stamen.com/terrain/11/584/782.png
## Source : http://tile.stamen.com/terrain/11/585/782.png
## Source : http://tile.stamen.com/terrain/11/582/783.png
## Source : http://tile.stamen.com/terrain/11/583/783.png
## Source : http://tile.stamen.com/terrain/11/584/783.png
## Source : http://tile.stamen.com/terrain/11/585/783.png
## Source : http://tile.stamen.com/terrain/11/582/784.png
## Source : http://tile.stamen.com/terrain/11/583/784.png
## Source : http://tile.stamen.com/terrain/11/584/784.png
## Source : http://tile.stamen.com/terrain/11/585/784.png
## Source : http://tile.stamen.com/terrain/11/582/785.png
## Source : http://tile.stamen.com/terrain/11/583/785.png
## Source : http://tile.stamen.com/terrain/11/584/785.png
## Source : http://tile.stamen.com/terrain/11/585/785.png

TESTING VALUES *Google Maps used for proxy versus reality comparisons

  1. Fast Food
obj <- osm_to_df(key = "amenity", value = "fast_food", type = "point")
## Warning: `as_tibble.matrix()` requires a matrix with column names or a `.name_repair` argument. Using compatibility `.name_repair`.
## This warning is displayed once per session.
#Add locations of fast food
osm_point_plot <- function(ff.map, obj, value) {
  if (!is.data.frame(obj)) stop("OSM object is not a data frame")
  if (!is.ggplot(ff.map))  stop("Baseline Fairfax map is not a ggplot")
  ff.map +
  geom_point(data = obj, aes(x = longitude, y = latitude),
             size = 0.1, colour = "red", alpha = 0.25) + 
  geom_density_2d(data = obj, aes(x = longitude, y = latitude),
                  colour = "purple", alpha = 0.5) +
  labs(title = sprintf("Fairfax County %s", value))
  }

#Visualize
osm_point_plot(ff.map, obj, "fast_food")
## Warning: Removed 2 rows containing non-finite values (stat_density2d).
## Warning: Removed 2 rows containing missing values (geom_point).

Findings:

Trends or connections: Definitely pockets of concentration across the county, but overall availability is generally spread around Fairfax County.

Usable data (Y/N/M): Y - numerous data points generally in line with density in county.

Proxy versus reality: In comparison with Google Maps, data points seem mostly accurate, but Google Maps does not have as much of a concentration in northern area or a dispersion of points in southern area as the OSM does. Overall, OSM has more data points.

  1. Supermarkets
obj <- osm_to_df(key = "shop", value = "supermarket", type = "point")
osm_point_plot(ff.map, obj, "supermarket")

Findings:

Trends or connections: Supermarket access is relatively evenly distributed in dense areas, while less dense areas have few to no supermarkets. Additionally, a greater concentration appears to exist in the central-western area.

Usable data (Y/N/M): Y - Data seems accurate, and correlation between supermarkets and density comparatively results in food deserts in certain parts of the county.

Proxy versus reality: Proxy data appears relatively similar to reality, but, similar to fast food, seems to have some more data points.

  1. Healthy Food Stores
#obj <- osm_to_df(key = "shop", value = "health_food", type = "point")

#osm_point_plot(ff.map, obj, "health_food")

Findings: Object is empty: no health food stores.

Usable data: N

Proxy versus reality: There are fewer than 10 health food stores that pop up on Google Maps. It is likely that OSM classifies these data points as green grocers.

  1. Green Grocers
obj <- osm_to_df(key = "shop", value = "greengrocer", type = "point")
osm_point_plot(ff.map, obj, "greengrocer")

obj$object_id %>% unique() %>% length()
## [1] 7

Findings:

Trends or connections: Only 7 points across the county, which are spread out in central-eastern area of Fairfax County (likely bleeding into Fairfax City).

Usable data (Y/N/M): M - 7 data points that are not concentrated in any notable manner do not seem conclusive enough to include in our overall analysis. However, it may be useful to combine with supermarkets.

Proxy versus reality: Google Maps places more green grocers in the central-western area (but qualifies a wider array of supermarkets as green grocers).*

  1. Convenience Stores
obj <- osm_to_df(key = "shop", value = "convenience", type = "point")
osm_point_plot(ff.map, obj, "convenience")

Findings:

Trends or connections: Relatively scattered pockets. Consistent convenience stores exist from the northern to western border. Also, a wealth of convenience stores exists in the eastern region of Fairfax County, as well as along center of the county (likely spilling into Fairfax City).

Usable data (Y/N/M): Y - OSM characterizes a convenience store as a “local shop carrying a small subset of the items you would find in a supermarket.” Good data that adds more color to the distribution of supermarkets in the area, and could be combined with supermarket value.

Proxy versus reality: Google Maps data does not reflect trend up W to N border, but has the same general scattering of the central E-W part of the county.

Note: Although OSM describes convenience stores as a smaller version of a supermarket, I tend to think of these shops as leaning more heavily towards alcohol sales. An example of a convenience store on Google Maps is 7-Eleven. To me, this does not equate to a supermarket alternative, but is also not solely an alcohol store. Therefore, I am not sure it makes sense to group convenience stores with just supermarkets or with just alcohol stores, but perhaps as some hybrid alternative.

  1. Alcohol Stores
obj <- osm_to_df(key = "shop", value = "alcohol", type = "point")
osm_point_plot(ff.map, obj, "alcohol")

Findings:

Trends or connections: There is a northwestern and eastern sprinkling of alcohol stores. Other areas have few to none.

Usable data (Y/N/M): M - This is an important value to include, but, in general, it seems as though there should be more data points for alcohol stores in Fairfax County.

Proxy versus reality: More alcohol stores are visible on Google Maps, but there is a similar concentration in eastern and slightly in western region.*

  1. Bars
obj <- osm_to_df(key = "amenity", value = "bar", type = "point")
osm_point_plot(ff.map, obj, "bar")

obj$object_id %>% unique() %>% length()
## [1] 114

Findings:

Trends or connections: Bars are widely dispersed across county. No heavy concentrations within Fairfax County, but some directly outside of boundaries

Usable data (Y/N/M): M - This could be used in conjunction with other values, as there is not a whole lot of data here to use on its own.

Proxy versus reality: Google Maps brings up a concentration of bars in central eastern area of Fairfax (part of which likely falls in Fairfax City). Therefore, Google Maps data is not as dispersed as OSM data. Additionally, Google Maps data has more data points than OSM.

  1. Restaurants
obj <- osm_to_df(key = "amenity", value = "restaurant", type = "point")
osm_point_plot(ff.map, obj, "restaurant")
## Warning: Removed 6 rows containing non-finite values (stat_density2d).
## Warning: Removed 6 rows containing missing values (geom_point).

Findings:

Trends or connections: Concentrations of restaurants in denser areas of the county.

Usable data (Y/N/M): Y - This data looks accurate, is relevant to our project, and correlates to county density.

Proxy versus reality: The proxy data is relatively similar to reality, but OSM data shows more points than that of Google Maps.

  1. Food Courts
obj <- osm_to_df(key = "amenity", value = "food_court", type = "point")
osm_point_plot(ff.map, obj, "food_court")

Findings: None visible in the County. Not relevant.

Trends or connections: N/A

Usable data (Y/N/M): N

Proxy versus reality: Google Maps has some data points, but these seem more like restaurants than traditional food courts.

  1. Pubs
obj <- osm_to_df(key = "amenity", value = "pub", type = "point")
osm_point_plot(ff.map, obj, "pub")

Findings:

Trends or connections: Small concentrations in lower Eastern and N-W area.

Usable data (Y/N/M): M - Might be useful if compiled with bars data, but not enough information on its own.

Proxy versus reality: a lot more pubs come up on Google Maps, but Google Maps likely characterizes pubs more broadly. Therefore, the Google Maps data likely includes some data that would be captured by the value “bars” in OSM.

  1. Beverages
obj <- osm_to_df(key = "shop", value = "beverages", type = "point")
osm_point_plot(ff.map, obj, "beverages")

Findings:

Trends or connections: N/A

Usable data (Y/N/M): N - Seems to be fewer than 5 data points in county. This could have been useful, as OSM defines these shops as those selling both alcoholic and non-alcoholic beverages, but it is not worth including, as there are so few points.

Proxy versus reality: a lot more data points pop up on Google Maps (the parameters for the search term “beverage” are probably much broader on Google Maps than those on OSM).*

ISSUES:

  1. Google Maps as a reality comparison: Google Maps often does not give all of a searched for value in Fairfax County, but instead gives outputs along E-W line across of the county (concentrated around Fairfax City), leaving out northern and southern areas in the process.

*For certain values, I generalized the Google Maps reality comparison search term to Fairfax County instead of “Fairfax County,” as the latter returned no results when paired with certain values. The lack of quotations likely construed the reality comparison to concentrate in Fairfax City.(EX: green grocers Fairfax County instead of “green grocers Fairfax County,” or a variation of the latter)